import os
import pandas as pd

SEED = 2021
dataset_path = '../../Dataset'
train_path = os.path.join(dataset_path, 'train_clean.csv')
test_path = os.path.join(dataset_path, 'test_8872.csv')  # highest score 0.8872

df1 = pd.read_csv(train_path)
df2 = pd.read_csv(test_path)
df2 = df2.rename(columns={'image_id': 'image', 'category_id': 'label'})
df2['image'] = df2['image'].apply(lambda x: 'test/' + x)
df2_sample = df2.sample(int(6408 * 0.7), random_state=SEED)  # 70%
df3 = pd.concat((df1, df2_sample), axis=0)
df3.to_csv(os.path.join(dataset_path, 'train_test_70.csv'), index=False)
print('done!')
